Data manipulation with tidyverse

The tidyverse is an opinionated collection of R packages designed for data science. All packages share an underlying design philosophy, grammar, and data structures.

dplyr is a grammar of data manipulation, providing a consistent set of verbs that help you solve the most common data manipulation challenges.

1 load package

Code
library(tidyverse)
Code
packageVersion("dplyr")
[1] '1.1.4'
Code
data(mtcars)
small_mtcars = mtcars %>% select(cyl, mpg,hp) %>% head()

make rowname into one column

Code
library(tibble)
small_mtcars=rownames_to_column(small_mtcars, var="car_name")

small_mtcars %>% head
           car_name cyl  mpg  hp
1         Mazda RX4   6 21.0 110
2     Mazda RX4 Wag   6 21.0 110
3        Datsun 710   4 22.8  93
4    Hornet 4 Drive   6 21.4 110
5 Hornet Sportabout   8 18.7 175
6           Valiant   6 18.1 105

2 Data manipulation

2.1 select column

2.2 get column names

Code
names(small_mtcars)
[1] "car_name" "cyl"      "mpg"      "hp"      

2.3 select by name

Code
mtcars %>% select(cyl, mpg,hp) 
                    cyl  mpg  hp
Mazda RX4             6 21.0 110
Mazda RX4 Wag         6 21.0 110
Datsun 710            4 22.8  93
Hornet 4 Drive        6 21.4 110
Hornet Sportabout     8 18.7 175
Valiant               6 18.1 105
Duster 360            8 14.3 245
Merc 240D             4 24.4  62
Merc 230              4 22.8  95
Merc 280              6 19.2 123
Merc 280C             6 17.8 123
Merc 450SE            8 16.4 180
Merc 450SL            8 17.3 180
Merc 450SLC           8 15.2 180
Cadillac Fleetwood    8 10.4 205
Lincoln Continental   8 10.4 215
Chrysler Imperial     8 14.7 230
Fiat 128              4 32.4  66
Honda Civic           4 30.4  52
Toyota Corolla        4 33.9  65
Toyota Corona         4 21.5  97
Dodge Challenger      8 15.5 150
AMC Javelin           8 15.2 150
Camaro Z28            8 13.3 245
Pontiac Firebird      8 19.2 175
Fiat X1-9             4 27.3  66
Porsche 914-2         4 26.0  91
Lotus Europa          4 30.4 113
Ford Pantera L        8 15.8 264
Ferrari Dino          6 19.7 175
Maserati Bora         8 15.0 335
Volvo 142E            4 21.4 109

2.4 select columns by name match with ‘p’

2.5 select columns by index

2.5.1 select first and 3rd columns

2.5.2 select first to 3rd columns

2.6 drop column

Code
small_mtcars %>% select(-cyl)
           car_name  mpg  hp
1         Mazda RX4 21.0 110
2     Mazda RX4 Wag 21.0 110
3        Datsun 710 22.8  93
4    Hornet 4 Drive 21.4 110
5 Hornet Sportabout 18.7 175
6           Valiant 18.1 105

2.7 Renaming column

Code
small_mtcars %>%rename(new_cyl=cyl)
           car_name new_cyl  mpg  hp
1         Mazda RX4       6 21.0 110
2     Mazda RX4 Wag       6 21.0 110
3        Datsun 710       4 22.8  93
4    Hornet 4 Drive       6 21.4 110
5 Hornet Sportabout       8 18.7 175
6           Valiant       6 18.1 105

2.8 Create column

2.8.1 Mutate

Code
small_mtcars %>%mutate(new_cyl=cyl+1)
           car_name cyl  mpg  hp new_cyl
1         Mazda RX4   6 21.0 110       7
2     Mazda RX4 Wag   6 21.0 110       7
3        Datsun 710   4 22.8  93       5
4    Hornet 4 Drive   6 21.4 110       7
5 Hornet Sportabout   8 18.7 175       9
6           Valiant   6 18.1 105       7

2.8.2 if else

Code
small_mtcars %>%mutate(new_cly_group=if_else(cyl>6,'big','small'))
           car_name cyl  mpg  hp new_cly_group
1         Mazda RX4   6 21.0 110         small
2     Mazda RX4 Wag   6 21.0 110         small
3        Datsun 710   4 22.8  93         small
4    Hornet 4 Drive   6 21.4 110         small
5 Hornet Sportabout   8 18.7 175           big
6           Valiant   6 18.1 105         small

2.8.3 case when

Code
small_mtcars %>%mutate(cly_group=case_when(
    cyl > 6 ~ "very big",
    cyl > 4 ~ "big",
    TRUE ~ "other",
  ))
           car_name cyl  mpg  hp cly_group
1         Mazda RX4   6 21.0 110       big
2     Mazda RX4 Wag   6 21.0 110       big
3        Datsun 710   4 22.8  93     other
4    Hornet 4 Drive   6 21.4 110       big
5 Hornet Sportabout   8 18.7 175  very big
6           Valiant   6 18.1 105       big

2.8.4 Transmute,create column and only keep this column

Code
small_mtcars %>%transmute(new_cyl=cyl+1)
  new_cyl
1       7
2       7
3       5
4       7
5       9
6       7

2.9 Filter rows

Code
small_mtcars %>%filter(cyl>5)
           car_name cyl  mpg  hp
1         Mazda RX4   6 21.0 110
2     Mazda RX4 Wag   6 21.0 110
3    Hornet 4 Drive   6 21.4 110
4 Hornet Sportabout   8 18.7 175
5           Valiant   6 18.1 105

2.9.1 Filters with AND conditions

Code
small_mtcars %>%filter(cyl>5,mpg>20)
        car_name cyl  mpg  hp
1      Mazda RX4   6 21.0 110
2  Mazda RX4 Wag   6 21.0 110
3 Hornet 4 Drive   6 21.4 110

2.9.2 Filters with OR conditions

Code
small_mtcars %>%filter(cyl>5|mpg>20)
           car_name cyl  mpg  hp
1         Mazda RX4   6 21.0 110
2     Mazda RX4 Wag   6 21.0 110
3        Datsun 710   4 22.8  93
4    Hornet 4 Drive   6 21.4 110
5 Hornet Sportabout   8 18.7 175
6           Valiant   6 18.1 105

2.9.3 filter row with index

2.9.3.1 5th rows

Code
small_mtcars %>% slice(5)
           car_name cyl  mpg  hp
1 Hornet Sportabout   8 18.7 175

2.9.3.2 1 and 5h rows

Code
small_mtcars %>% slice(1:5)
           car_name cyl  mpg  hp
1         Mazda RX4   6 21.0 110
2     Mazda RX4 Wag   6 21.0 110
3        Datsun 710   4 22.8  93
4    Hornet 4 Drive   6 21.4 110
5 Hornet Sportabout   8 18.7 175

2.9.3.3 get ramdon 5 rows

Code
small_mtcars %>% sample_n(5)
           car_name cyl  mpg  hp
1    Hornet 4 Drive   6 21.4 110
2     Mazda RX4 Wag   6 21.0 110
3         Mazda RX4   6 21.0 110
4 Hornet Sportabout   8 18.7 175
5        Datsun 710   4 22.8  93

2.10 Append

2.10.1 append by row

Code
small_mtcars %>% rbind(small_mtcars)
            car_name cyl  mpg  hp
1          Mazda RX4   6 21.0 110
2      Mazda RX4 Wag   6 21.0 110
3         Datsun 710   4 22.8  93
4     Hornet 4 Drive   6 21.4 110
5  Hornet Sportabout   8 18.7 175
6            Valiant   6 18.1 105
7          Mazda RX4   6 21.0 110
8      Mazda RX4 Wag   6 21.0 110
9         Datsun 710   4 22.8  93
10    Hornet 4 Drive   6 21.4 110
11 Hornet Sportabout   8 18.7 175
12           Valiant   6 18.1 105

2.10.2 append by column

Code
small_mtcars %>% cbind(small_mtcars)
           car_name cyl  mpg  hp          car_name cyl  mpg  hp
1         Mazda RX4   6 21.0 110         Mazda RX4   6 21.0 110
2     Mazda RX4 Wag   6 21.0 110     Mazda RX4 Wag   6 21.0 110
3        Datsun 710   4 22.8  93        Datsun 710   4 22.8  93
4    Hornet 4 Drive   6 21.4 110    Hornet 4 Drive   6 21.4 110
5 Hornet Sportabout   8 18.7 175 Hornet Sportabout   8 18.7 175
6           Valiant   6 18.1 105           Valiant   6 18.1 105

2.10.3 Sepcial vales

2.10.3.1 NAN

NaN (“Not a Number”) means 0/0

Code
v1 <- NaN
v1
[1] NaN
Code
is.na(v1)
[1] TRUE

2.10.3.2 NA

NA (“Not Available”) is generally interpreted as a missing value

Code
v2 <- NA
v2
[1] NA
Code
is.na(v2)
[1] TRUE

2.10.3.3 NULL

NULL is an object and is returned when an expression or function results in an undefined value. In R language, NULL (capital letters) is a reserved word

Code
v3=NULL
v3
NULL
Code
is.na(v3)
logical(0)

2.11 group by

2.11.1 average,min,max,sum

Code
small_mtcars %>%group_by(cyl) %>% summarise(avg_mpg=mean(mpg)
                                            ,min_mpg=min(mpg)
                                            ,max_mpg=max(mpg)
                                            ,sum_mpg=sum(mpg))
# A tibble: 3 × 5
    cyl avg_mpg min_mpg max_mpg sum_mpg
  <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
1     4    22.8    22.8    22.8    22.8
2     6    20.4    18.1    21.4    81.5
3     8    18.7    18.7    18.7    18.7

2.11.2 count record and count distinct record

Code
small_mtcars %>%group_by(cyl) %>% summarise(n_mpg=n()
                                            ,distinct_n_mpg=n_distinct(mpg)
                                            
                                            )
# A tibble: 3 × 3
    cyl n_mpg distinct_n_mpg
  <dbl> <int>          <int>
1     4     1              1
2     6     4              3
3     8     1              1

2.12 order rows

Code
small_mtcars %>%arrange(cyl) 
           car_name cyl  mpg  hp
1        Datsun 710   4 22.8  93
2         Mazda RX4   6 21.0 110
3     Mazda RX4 Wag   6 21.0 110
4    Hornet 4 Drive   6 21.4 110
5           Valiant   6 18.1 105
6 Hornet Sportabout   8 18.7 175

2.12.1 Sort in descending order

Code
small_mtcars %>%arrange(desc(cyl) )
           car_name cyl  mpg  hp
1 Hornet Sportabout   8 18.7 175
2         Mazda RX4   6 21.0 110
3     Mazda RX4 Wag   6 21.0 110
4    Hornet 4 Drive   6 21.4 110
5           Valiant   6 18.1 105
6        Datsun 710   4 22.8  93

2.12.2 Arrange by multiple variables

Code
small_mtcars %>%arrange(cyl,mpg)
           car_name cyl  mpg  hp
1        Datsun 710   4 22.8  93
2           Valiant   6 18.1 105
3         Mazda RX4   6 21.0 110
4     Mazda RX4 Wag   6 21.0 110
5    Hornet 4 Drive   6 21.4 110
6 Hornet Sportabout   8 18.7 175

2.13 join

Code
left_data=small_mtcars %>% slice(1:2)
right_data=small_mtcars %>% slice(2:4)
Code
left_data
       car_name cyl mpg  hp
1     Mazda RX4   6  21 110
2 Mazda RX4 Wag   6  21 110
Code
right_data
        car_name cyl  mpg  hp
1  Mazda RX4 Wag   6 21.0 110
2     Datsun 710   4 22.8  93
3 Hornet 4 Drive   6 21.4 110

2.13.1 inner_join

Code
data=left_data %>% inner_join(right_data,join_by(car_name== car_name), suffix = c("_l", "._r"))
data
       car_name cyl_l mpg_l hp_l cyl._r mpg._r hp._r
1 Mazda RX4 Wag     6    21  110      6     21   110

2.13.2 left join

Code
data=left_data %>% left_join(right_data,join_by(car_name== car_name), suffix = c("_l", "._r"))
data
       car_name cyl_l mpg_l hp_l cyl._r mpg._r hp._r
1     Mazda RX4     6    21  110     NA     NA    NA
2 Mazda RX4 Wag     6    21  110      6     21   110

2.13.3 full join

Code
data=left_data %>% full_join(right_data,join_by(car_name== car_name), suffix = c("_l", "._r"))
data
        car_name cyl_l mpg_l hp_l cyl._r mpg._r hp._r
1      Mazda RX4     6    21  110     NA     NA    NA
2  Mazda RX4 Wag     6    21  110      6   21.0   110
3     Datsun 710    NA    NA   NA      4   22.8    93
4 Hornet 4 Drive    NA    NA   NA      6   21.4   110

2.13.4 anti join

anti_join() return all rows from x without a match in y

Code
data=left_data %>% anti_join(right_data,join_by(car_name== car_name))
data
   car_name cyl mpg  hp
1 Mazda RX4   6  21 110

2.14 Reshape tables

Code
olddata_wide <- read.table(header=TRUE, text='
 subject sex control cond1 cond2
       1   M     7.9  12.3  10.7
       2   F     6.3  10.6  11.1
       3   F     9.5  13.1  13.8
       4   M    11.5  13.4  12.9
')
Code
olddata_wide
  subject sex control cond1 cond2
1       1   M     7.9  12.3  10.7
2       2   F     6.3  10.6  11.1
3       3   F     9.5  13.1  13.8
4       4   M    11.5  13.4  12.9

2.14.1 Gather data long(wide to long)

Code
data_long=olddata_wide %>%
  pivot_longer(!c(subject,sex), names_to = 'income', values_to = 'DATA')

data_long
# A tibble: 12 × 4
   subject sex   income   DATA
     <int> <chr> <chr>   <dbl>
 1       1 M     control   7.9
 2       1 M     cond1    12.3
 3       1 M     cond2    10.7
 4       2 F     control   6.3
 5       2 F     cond1    10.6
 6       2 F     cond2    11.1
 7       3 F     control   9.5
 8       3 F     cond1    13.1
 9       3 F     cond2    13.8
10       4 M     control  11.5
11       4 M     cond1    13.4
12       4 M     cond2    12.9

2.14.2 Spread data wide (long to wide)

Code
data_wide=data_long %>%
  pivot_wider(names_from = income, values_from = DATA)

data_wide
# A tibble: 4 × 5
  subject sex   control cond1 cond2
    <int> <chr>   <dbl> <dbl> <dbl>
1       1 M         7.9  12.3  10.7
2       2 F         6.3  10.6  11.1
3       3 F         9.5  13.1  13.8
4       4 M        11.5  13.4  12.9

3 string

stringr is built on top of stringi, which uses the ICU C library to provide fast, correct implementations of common string manipulations.

3.1 length

Code
x <- "I like horses."
str_length(x)
[1] 14

3.2 upper case

Code
x <- "I like horses."

str_to_upper(x)
[1] "I LIKE HORSES."

3.3 lower case

Code
x <- "I like horses."

str_to_lower(x)
[1] "i like horses."

3.4 match

Code
word_list=c('abc','bbc','appale','bbaa','cc')

3.4.1 match with ‘a’

Code
word_list %>% str_detect('a')
[1]  TRUE FALSE  TRUE  TRUE FALSE

3.4.2 match with ‘a’ count

Code
word_list %>% str_count('a')
[1] 1 0 2 2 0

3.4.3 index of vector match

Code
word_list %>% str_which('a')
[1] 1 3 4

3.4.4 index of each word match (first match)

Code
word_list %>% str_locate('a')
     start end
[1,]     1   1
[2,]    NA  NA
[3,]     1   1
[4,]     3   3
[5,]    NA  NA

3.4.5 index of each word match (all match)

Code
word_list %>% str_locate_all('a')
[[1]]
     start end
[1,]     1   1

[[2]]
     start end

[[3]]
     start end
[1,]     1   1
[2,]     4   4

[[4]]
     start end
[1,]     3   3
[2,]     4   4

[[5]]
     start end
Code
trx='abc1993'

num=str_match(trx, "(\\d)+")

num
     [,1]   [,2]
[1,] "1993" "3" 

3.5 concatenation

Code
a='aaaa'
b='bbbb'
Code
paste(a,b)
[1] "aaaa bbbb"
Code
paste0(a,b)
[1] "aaaabbbb"

3.6 replace string

3.6.1 str_replace()

Code
text001="abcb"
text001 %>% str_replace('b','1')
[1] "a1cb"

3.6.2 str_replace_all()

Code
text001="abcb"
text001 %>% str_replace_all('b','1')
[1] "a1c1"

3.6.3 replace with regular expression

Code
word2=c('a-b','M6D5','M6D54','M6D55','M6D5') %>% as_tibble()
word2
# A tibble: 5 × 1
  value
  <chr>
1 a-b  
2 M6D5 
3 M6D54
4 M6D55
5 M6D5 

replace D+all number with ’_’ ::: {.cell}

Code
word2 %>% mutate(new=value %>% str_replace_all('D\\d*','_'))
# A tibble: 5 × 2
  value new  
  <chr> <chr>
1 a-b   a-b  
2 M6D5  M6_  
3 M6D54 M6_  
4 M6D55 M6_  
5 M6D5  M6_  

:::

3.7 split string

Code
word=c('a-b','1-c','c-c')

df_word=word %>% as.data.frame() %>% rename('word'='.')

3.7.1 make 2 coloumn and split by - using stringr package

Code
df_word
  word
1  a-b
2  1-c
3  c-c
Code
library(stringr)
df_word$word %>% str_split_fixed('-',2)
     [,1] [,2]
[1,] "a"  "b" 
[2,] "1"  "c" 
[3,] "c"  "c" 

3.7.2 make 2 coloumn and split by - using tidyr package

Code
df_word
  word
1  a-b
2  1-c
3  c-c
Code
library(tidyr)
df_word %>% separate(word,c('col1','col2'),'-')
  col1 col2
1    a    b
2    1    c
3    c    c

3.8 subset element in list

Code
word=c('aabbbasdf','apple','pet','melon')

3.8.1 word with ‘a’

Code
word %>% str_subset('a')
[1] "aabbbasdf" "apple"    

3.8.2 word with ‘a,e,i,o,u’

Code
word %>% str_subset("[aeiou]")
[1] "aabbbasdf" "apple"     "pet"       "melon"    

3.8.3 word with ‘pet’ or ‘melon’

Code
word %>% str_subset('pet|melon')
[1] "pet"   "melon"

3.8.4 word with ‘pet’ or ‘melon’

Code
word %>% str_subset('aa')
[1] "aabbbasdf"

3.9 extract string

Code
data001=mtcars
data001 <- cbind(names = rownames(data001), data001)

3.9.1 by postion

extract 2 to 4

Code
data001$new_names=data001$names %>% str_sub(2,4)
head(data001 %>% select(new_names,names))
                  new_names             names
Mazda RX4               azd         Mazda RX4
Mazda RX4 Wag           azd     Mazda RX4 Wag
Datsun 710              ats        Datsun 710
Hornet 4 Drive          orn    Hornet 4 Drive
Hornet Sportabout       orn Hornet Sportabout
Valiant                 ali           Valiant

3.9.2 extracting with Regular expressions

extracting one letter

Code
trx='abc1993 ccc'
trx %>% str_extract("\\w")
[1] "a"

extracting one letter/number

Code
trx='abc1993 ccc'
trx %>% str_extract(".")
[1] "a"

extracting one .

Code
trx='abc1993.ccc'
trx %>% str_extract("\\.")
[1] "."

extracting 3 letter start with ‘a’

Code
word=c('aabbbasdfe. e','appl.e e','pet','melon','asdf g 133asd') %>% as_tibble()
word
# A tibble: 5 × 1
  value        
  <chr>        
1 aabbbasdfe. e
2 appl.e e     
3 pet          
4 melon        
5 asdf g 133asd

extracting 2 letter start with ‘a’

Code
word %>%mutate(new=value %>% str_extract("a.."))
# A tibble: 5 × 2
  value         new  
  <chr>         <chr>
1 aabbbasdfe. e aab  
2 appl.e e      app  
3 pet           <NA> 
4 melon         <NA> 
5 asdf g 133asd asd  

extracting all letter start with ‘a’

Code
word %>%mutate(new=value %>% str_extract("a.+"))
# A tibble: 5 × 2
  value         new          
  <chr>         <chr>        
1 aabbbasdfe. e aabbbasdfe. e
2 appl.e e      appl.e e     
3 pet           <NA>         
4 melon         <NA>         
5 asdf g 133asd asdf g 133asd

extracting all letter between ‘a’ and ‘e’

Code
word %>%mutate(new=value %>% str_extract("(a).*?(e)"))
# A tibble: 5 × 2
  value         new       
  <chr>         <chr>     
1 aabbbasdfe. e aabbbasdfe
2 appl.e e      appl.e    
3 pet           <NA>      
4 melon         <NA>      
5 asdf g 133asd <NA>      

extracting all letter between ‘a’ and ‘.’

Code
word %>%mutate(new=value %>% str_extract("(a).*?(\\.)"))
# A tibble: 5 × 2
  value         new        
  <chr>         <chr>      
1 aabbbasdfe. e aabbbasdfe.
2 appl.e e      appl.      
3 pet           <NA>       
4 melon         <NA>       
5 asdf g 133asd <NA>       

extracting all letter between ‘a’ and space

Code
word %>%mutate(new=value %>% str_extract("(a).*?( )"))
# A tibble: 5 × 2
  value         new           
  <chr>         <chr>         
1 aabbbasdfe. e "aabbbasdfe. "
2 appl.e e      "appl.e "     
3 pet            <NA>         
4 melon          <NA>         
5 asdf g 133asd "asdf "       

extracting all letter end with number

Code
word %>%mutate(new=value %>% str_extract(".*?\\d")
               , new2=new %>% str_sub(end=-3)
                                         
)
# A tibble: 5 × 3
  value         new      new2  
  <chr>         <chr>    <chr> 
1 aabbbasdfe. e <NA>     <NA>  
2 appl.e e      <NA>     <NA>  
3 pet           <NA>     <NA>  
4 melon         <NA>     <NA>  
5 asdf g 133asd asdf g 1 asdf g

extracting one number

Code
trx='abc1993 ccc'
trx %>% str_extract("\\d")
[1] "1"

extracting more number

Code
trx='abc1993 ccc'
trx %>% str_extract("(\\d)+")
[1] "1993"

extracting more non number

Code
trx='abc1993 ccc'
trx %>% str_extract_all("\\D+")
[[1]]
[1] "abc"  " ccc"

extracting all letter from the begining

Code
trx='abc1993 ccc'
trx %>% str_extract("[:alpha:]+")
[1] "abc"

extracting number after points:

Code
trx=c('abcpoints:100 ccc','asdfasd','points:66','thisis points:6')
trx
[1] "abcpoints:100 ccc" "asdfasd"           "points:66"        
[4] "thisis points:6"  

extracting number after points: and remove non match

Code
t=trx %>% str_extract("points:[:digit:]+") %>% na.omit()%>% str_extract("points:[:digit:]+")
t
[1] "points:100" "points:66"  "points:6"  

3.10 Regular expressions

Code
phones=c('abba','124','anna')
phones
[1] "abba" "124"  "anna"

3.10.1 get a+ (b or n) + (b or n) + a

Code
phones %>% str_view('a[bn][bn]a')
[1] │ <abba>
[3] │ <anna>

3.10.2 bb or nn

Code
phones %>% str_view('(bb|nn)')
[1] │ a<bb>a
[3] │ a<nn>a

4 date

using lubridate package to handle date and time in R

Code
library(tidyverse)
library(lubridate)
library(nycflights13)

4.1 date format

input as character

Code
date1='2023-01-01'
class(date1)
[1] "character"
Code
date1
[1] "2023-01-01"

convert into date type with as.Date()

Code
date2=as.Date('2023-01-01')
class(date2)
[1] "Date"
Code
date2
[1] "2023-01-01"

convert into date type with ymd()

Code
date3=ymd('2023-01-01')
class(date3)
[1] "Date"
Code
date3
[1] "2023-01-01"

get today with today()

Code
today()
[1] "2025-02-21"

get local timezone

Code
Sys.timezone()
[1] "Asia/Shanghai"

4.2 change date format

make multiple column character to date with make_date()

Code
flights %>% 
  select(year, month, day, hour, minute) %>% 
  mutate(departure = make_date(year, month, day))
# A tibble: 336,776 × 6
    year month   day  hour minute departure 
   <int> <int> <int> <dbl>  <dbl> <date>    
 1  2013     1     1     5     15 2013-01-01
 2  2013     1     1     5     29 2013-01-01
 3  2013     1     1     5     40 2013-01-01
 4  2013     1     1     5     45 2013-01-01
 5  2013     1     1     6      0 2013-01-01
 6  2013     1     1     5     58 2013-01-01
 7  2013     1     1     6      0 2013-01-01
 8  2013     1     1     6      0 2013-01-01
 9  2013     1     1     6      0 2013-01-01
10  2013     1     1     6      0 2013-01-01
# ℹ 336,766 more rows
Code
flights %>% 
  select(year, month, day, hour, minute) %>% 
  mutate(departure = make_datetime(year, month, day, hour, minute))
# A tibble: 336,776 × 6
    year month   day  hour minute departure          
   <int> <int> <int> <dbl>  <dbl> <dttm>             
 1  2013     1     1     5     15 2013-01-01 05:15:00
 2  2013     1     1     5     29 2013-01-01 05:29:00
 3  2013     1     1     5     40 2013-01-01 05:40:00
 4  2013     1     1     5     45 2013-01-01 05:45:00
 5  2013     1     1     6      0 2013-01-01 06:00:00
 6  2013     1     1     5     58 2013-01-01 05:58:00
 7  2013     1     1     6      0 2013-01-01 06:00:00
 8  2013     1     1     6      0 2013-01-01 06:00:00
 9  2013     1     1     6      0 2013-01-01 06:00:00
10  2013     1     1     6      0 2013-01-01 06:00:00
# ℹ 336,766 more rows

4.3 day differnce between two dates

Code
day1=ymd('2022-01-01')
day2=ymd('2023-02-03')

diff=day2-day1
Code
diff
Time difference of 398 days

using interval() find two dates gap

Code
interval(day1,day2) %>% as.period()
[1] "1y 1m 2d 0H 0M 0S"

find day gap

Code
interval(day1,day2)%/% days(1)
[1] 398

find month gap

Code
interval(day1,day2)%/% months(1)
[1] 13

find year gap

Code
interval(day1,day2)%/% years(1)
[1] 1

4.4 day and time

Code
now_time=now()
now_time
[1] "2025-02-21 02:11:03 CST"

4.4.1 get year

Code
year(now_time)
[1] 2025

4.4.2 get month

Code
month(now_time)
[1] 2

4.4.3 get date of the month

Code
mday(now_time)
[1] 21

4.4.4 get date of the year

Code
yday(now_time)
[1] 52

4.4.5 get date of the week

Code
wday(now_time)
[1] 6

4.4.5.1 get hour

Code
hour(now_time)
[1] 2

4.4.6 get minute

Code
minute(now_time)
[1] 11

4.4.7 get second

Code
second(now_time)
[1] 3.341459

4.5 dataframe to other data format

4.5.1 dataframe to vector

Code
data=small_mtcars$cyl
data
[1] 6 6 4 6 8 6
Code
class(data)
[1] "numeric"

4.5.2 dataframe to matrix

Code
data=data.matrix(small_mtcars)
data
     car_name cyl  mpg  hp
[1,]        4   6 21.0 110
[2,]        5   6 21.0 110
[3,]        1   4 22.8  93
[4,]        2   6 21.4 110
[5,]        3   8 18.7 175
[6,]        6   6 18.1 105
Code
class(data)
[1] "matrix" "array" 

4.5.3 dataframe to list

Code
data=as.list(small_mtcars)
data
$car_name
[1] "Mazda RX4"         "Mazda RX4 Wag"     "Datsun 710"       
[4] "Hornet 4 Drive"    "Hornet Sportabout" "Valiant"          

$cyl
[1] 6 6 4 6 8 6

$mpg
[1] 21.0 21.0 22.8 21.4 18.7 18.1

$hp
[1] 110 110  93 110 175 105
Code
class(data)
[1] "list"

5 reference:

https://dplyr.tidyverse.org/

https://evoldyn.gitlab.io/evomics-2018/ref-sheets/R_strings.pdf

https://www.youtube.com/watch?v=3Aki_sQYQG0

https://www.r-bloggers.com/2018/07/r-null-values-null-na-nan-inf

Back to top